{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Comparison of kernel ridge regression and SVR\n\n\nBoth kernel ridge regression (KRR) and SVR learn a non-linear function by\nemploying the kernel trick, i.e., they learn a linear function in the space\ninduced by the respective kernel which corresponds to a non-linear function in\nthe original space. They differ in the loss functions (ridge versus\nepsilon-insensitive loss). In contrast to SVR, fitting a KRR can be done in\nclosed-form and is typically faster for medium-sized datasets. On the other\nhand, the learned model is non-sparse and thus slower than SVR at\nprediction-time.\n\nThis example illustrates both methods on an artificial dataset, which\nconsists of a sinusoidal target function and strong noise added to every fifth\ndatapoint. The first figure compares the learned model of KRR and SVR when both\ncomplexity/regularization and bandwidth of the RBF kernel are optimized using\ngrid-search. The learned functions are very similar; however, fitting KRR is\napprox. seven times faster than fitting SVR (both with grid-search). However,\nprediction of 100000 target values is more than tree times faster with SVR\nsince it has learned a sparse model using only approx. 1/3 of the 100 training\ndatapoints as support vectors.\n\nThe next figure compares the time for fitting and prediction of KRR and SVR for\ndifferent sizes of the training set. Fitting KRR is faster than SVR for medium-\nsized training sets (less than 1000 samples); however, for larger training sets\nSVR scales better. With regard to prediction time, SVR is faster than\nKRR for all sizes of the training set because of the learned sparse\nsolution. Note that the degree of sparsity and thus the prediction time depends\non the parameters epsilon and C of the SVR.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nfrom sklearn.svm import SVR\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import learning_curve\nfrom sklearn.kernel_ridge import KernelRidge\nimport matplotlib.pyplot as plt\n\nrng = np.random.RandomState(0)\n\n# #############################################################################\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\n\n# Add noise to targets\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\n\nX_plot = np.linspace(0, 5, 100000)[:, None]\n\n# #############################################################################\n# Fit regression model\ntrain_size = 100\nsvr = GridSearchCV(SVR(kernel='rbf', gamma=0.1),\n                   param_grid={\"C\": [1e0, 1e1, 1e2, 1e3],\n                               \"gamma\": np.logspace(-2, 2, 5)})\n\nkr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1),\n                  param_grid={\"alpha\": [1e0, 0.1, 1e-2, 1e-3],\n                              \"gamma\": np.logspace(-2, 2, 5)})\n\nt0 = time.time()\nsvr.fit(X[:train_size], y[:train_size])\nsvr_fit = time.time() - t0\nprint(\"SVR complexity and bandwidth selected and model fitted in %.3f s\"\n      % svr_fit)\n\nt0 = time.time()\nkr.fit(X[:train_size], y[:train_size])\nkr_fit = time.time() - t0\nprint(\"KRR complexity and bandwidth selected and model fitted in %.3f s\"\n      % kr_fit)\n\nsv_ratio = svr.best_estimator_.support_.shape[0] / train_size\nprint(\"Support vector ratio: %.3f\" % sv_ratio)\n\nt0 = time.time()\ny_svr = svr.predict(X_plot)\nsvr_predict = time.time() - t0\nprint(\"SVR prediction for %d inputs in %.3f s\"\n      % (X_plot.shape[0], svr_predict))\n\nt0 = time.time()\ny_kr = kr.predict(X_plot)\nkr_predict = time.time() - t0\nprint(\"KRR prediction for %d inputs in %.3f s\"\n      % (X_plot.shape[0], kr_predict))\n\n\n# #############################################################################\n# Look at the results\nsv_ind = svr.best_estimator_.support_\nplt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors',\n            zorder=2, edgecolors=(0, 0, 0))\nplt.scatter(X[:100], y[:100], c='k', label='data', zorder=1,\n            edgecolors=(0, 0, 0))\nplt.plot(X_plot, y_svr, c='r',\n         label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict))\nplt.plot(X_plot, y_kr, c='g',\n         label='KRR (fit: %.3fs, predict: %.3fs)' % (kr_fit, kr_predict))\nplt.xlabel('data')\nplt.ylabel('target')\nplt.title('SVR versus Kernel Ridge')\nplt.legend()\n\n# Visualize training and prediction time\nplt.figure()\n\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\nsizes = np.logspace(1, 4, 7).astype(np.int)\nfor name, estimator in {\"KRR\": KernelRidge(kernel='rbf', alpha=0.1,\n                                           gamma=10),\n                        \"SVR\": SVR(kernel='rbf', C=1e1, gamma=10)}.items():\n    train_time = []\n    test_time = []\n    for train_test_size in sizes:\n        t0 = time.time()\n        estimator.fit(X[:train_test_size], y[:train_test_size])\n        train_time.append(time.time() - t0)\n\n        t0 = time.time()\n        estimator.predict(X_plot[:1000])\n        test_time.append(time.time() - t0)\n\n    plt.plot(sizes, train_time, 'o-', color=\"r\" if name == \"SVR\" else \"g\",\n             label=\"%s (train)\" % name)\n    plt.plot(sizes, test_time, 'o--', color=\"r\" if name == \"SVR\" else \"g\",\n             label=\"%s (test)\" % name)\n\nplt.xscale(\"log\")\nplt.yscale(\"log\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Time (seconds)\")\nplt.title('Execution Time')\nplt.legend(loc=\"best\")\n\n# Visualize learning curves\nplt.figure()\n\nsvr = SVR(kernel='rbf', C=1e1, gamma=0.1)\nkr = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)\ntrain_sizes, train_scores_svr, test_scores_svr = \\\n    learning_curve(svr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),\n                   scoring=\"neg_mean_squared_error\", cv=10)\ntrain_sizes_abs, train_scores_kr, test_scores_kr = \\\n    learning_curve(kr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),\n                   scoring=\"neg_mean_squared_error\", cv=10)\n\nplt.plot(train_sizes, -test_scores_svr.mean(1), 'o-', color=\"r\",\n         label=\"SVR\")\nplt.plot(train_sizes, -test_scores_kr.mean(1), 'o-', color=\"g\",\n         label=\"KRR\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Mean Squared Error\")\nplt.title('Learning curves')\nplt.legend(loc=\"best\")\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}